/**
 ** 华莱士树的实现
 ** 乘法器单元还应该有一个输入，包含了最后结果要写回的ROB
**/

module wallace (
    input wire clk,
    input wire rst,
    input wire data_ready,        //要进行运算的数据是否准备就绪的标记
    input wire [31:0] x,
    input wire [31:0] y,
    input wire [3:0] ctrl,
    input wire [4:0] save_no_in, //保留站号
    input wire [4:0] rd_rob_in,
    output reg done,
    output reg [4:0] save_no_out,
    output reg [4:0] rd_rob_out,
    output reg [31:0] result
);

    wire [63:0] x_final;
    wire [63:0] y_final;
    
    wire [63:0] z;
    reg [63:0] partial [0:15];      //16个64位部分积
    reg [63:0] partial_out [0:15];  //第二阶段用到的部分积
    reg [63:0] partial_2 [0:15];
    reg [32:0] ext_y;
    reg [63:0] ext_x;
    reg [63:0] ext_nx;
    reg [63:0] ext_dx;
    reg [63:0] ext_ndx;
    wire zero;
    reg [4:0] rd_rob_1, rd_rob_2;
    reg [4:0] save_no_1, save_no_2;
    reg done_1, done_2;
    reg [15:0] plusOne, plusOne_out, plusOne_2;
    reg [2:0] booth;
    integer i, t, k;
    reg ready, valid;
    
    //阶段1，获取部分积
    always @(*) begin
        //Y尾部拓展一位0，方便取Booth编码
        ext_y = {y, 1'b0};
        //准备好X，-X，2X， -2X，方便构建部分积
        ext_x = {{32{x[31]}}, x};
        ext_nx = ~ext_x + 1;
        ext_dx = ext_x<<1;
        ext_ndx = ~ext_dx + 1;
        //开始计算部分积
        for (i=0; i<16; i++) begin
            booth = {ext_y[i*2+2], ext_y[i*2+1], ext_y[i*2]};
            plusOne[i] = 1'b0;  //先不用胡老师的办法，用那种方法总是计算的结果不正确
            case (booth)
                3'b000: partial[i] = 64'b0000000000000000;
                3'b001: partial[i] = ext_x; 
                3'b010: partial[i] = ext_x;
                3'b011: partial[i] = ext_dx;
                3'b100: partial[i] = ext_ndx;
                3'b101: partial[i] = ext_nx;
                3'b110: partial[i] = ext_nx;
                3'b111: partial[i] = 64'b0000000000000000;
                default: partial[i] = 64'b0000000000000000;
            endcase
            //移位补0
            partial[i] = partial[i] << (i*2);
        end
        //其他的一些信号
        rd_rob_1 = rd_rob_in;
        save_no_1 = save_no_in;
        done_1 = data_ready;
    end


    always @(posedge clk) begin
        for (k=0; k<16; k++) begin
            partial_2[k] <= partial[k];
        end

        plusOne_2 <= plusOne;
        rd_rob_2 <= rd_rob_1;
        save_no_2 <= save_no_1;
        done_2 <= done_1;
    end

    //阶段2，构建华莱士树
    wire [13:0] carry [0:64];
    wire [63:0] wallaceSum;
    wire [63:0] wallaceCarry;
    wire [15:0] nowx [0:63];
    assign carry[0] = plusOne_2[13:0];
    genvar j;
    //使用generate语句构建64个Item，连好线，就会得到华莱士树
    generate
        for (j=0; j<64; j=j+1) begin
            assign nowx[j] = {partial_2[0][j], partial_2[1][j], partial_2[2][j], partial_2[3][j], partial_2[4][j], partial_2[5][j], partial_2[6][j], partial_2[7][j], partial_2[8][j], partial_2[9][j], partial_2[10][j], partial_2[11][j], partial_2[12][j], partial_2[13][j], partial_2[14][j], partial_2[15][j]};
            wallaceItem item(.x(nowx[j]), .carry_pre(carry[j]), .sum(wallaceSum[j]), .carry(wallaceCarry[j]), .carry_next(carry[j+1]));
        end
    endgenerate

    //累加结果
    assign x_final = {wallaceCarry[62:0], plusOne_2[14]};
    assign y_final = wallaceSum;
    adder_64bits add_final(.a(x_final), .b(y_final), .cin(plusOne_2[15]), .result(z), .cout(), .over());

    always @(posedge clk) begin
        done <= done_2;
        result <= z[31:0];     //仅保留乘积的低位
        rd_rob_out <= rd_rob_2;
        save_no_out <= save_no_2;
    end
endmodule